To narrow the problem some, we look at only ndt.unified_downloads on the new platform, although we include partial parameterization to support expantion to both the old new platform as well as additional tables.
All "flows" are row counts. In most cases they are rows with proper unique UUIDs (or more precisely number of nuique UUIDs meeting a particular critera.
In [1]:
import re
import pandas as pd
import BQhelper as bq
import inspectTable
bq.project = "mlab-sandbox"
In [2]:
mainViewQ="""
WITH
stage1 AS (
SELECT
1 AS key,
COUNT (*) AS numRows,
COUNTIF ({UUID} IS NULL OR {UUID} LIKE ( "" )) AS numMissingUUID
FROM {productTable}
WHERE {whereClause}
),
stage2 AS (
SELECT
1 AS key,
COUNT(*) AS numHaveUUID,
COUNT(*) - COUNT( DISTINCT {UUID} ) AS numDupUUID,
COUNT( DISTINCT {UUID} ) AS numUUID,
COUNTIF (filter.isValidBest) AS numBest,
COUNTIF (NOT filter.isValidBest) AS numNotBest
FROM {productTable}
WHERE {UUID} IS NOT NULL
AND {UUID} NOT LIKE ( "" )
AND {whereClause}
)
SELECT * FROM stage1 JOIN stage2 USING (key)
"""
res=bq.DataFrameQuery(mainViewQ, productTable='measurement-lab.library.ndt_unified_ndt5_uploads', UUID='a.UUID',
whereClause='test_date="2020-01-10"')
print (res)
In [ ]: